
clear
set more off

/*********************************************************************************
Name: data_availability.do

Data In: [Data/Original/patient_surveys_attempts.dta,
		  Data/Original/hw_roster.dta,
		  Data/Intermediate/verified_patients.dta,
		  Data/Original/tb_registers.dta,
		  Data/Original/observation_days_patients.dta,
		  Data/Original/observation_days_centers.dta,
		  Data/Original/hw_surveys.dta,
		  Data/Intermediate/hw_roster_by_hw.dta,
		  Data/Original/salary_slips.dta,
		  Data/Intermediate/hw_roster_by_center.dta,
		  Data/Original/spot_checks.dta,
		  Data/Original/centerwise_sheets.dta]

Data Out: []

Results Out: [Results/Appendix/TableA2_PanelA.out,
		      Results/Appendix/TableA2_PanelB.out,
		      Results/Appendix/TableA2_PanelC.out]

Purpose of do-file: Checking the balance in the availability of the various data sources across the treatment and control groups

Organization: PART-1: Checking the balance in the availability of patient-level data across the treatment and control groups
			  PART-2: Checking the balance in the availability of health-worker-level data across the treatment and control groups
			  PART-3: Checking the balance in the availability of center-level data across the treatment and control groups
*********************************************************************************/

* Setting path directory
cd "${DIRECTORY}"


****************************************
*** PART-1 *** Checking the balance in the availability of patient-level data across the treatment and control groups
****************************************

** Patient surveys and government register data

* Calling and merging datasets

use "Data/Original/patient_surveys_attempts.dta", clear

merge m:1 Unique_ID UID_Center using "Data/Original/hw_roster.dta"
keep if _merge == 3
drop _merge

merge 1:1 q1_ques_code a02_entry_exit using "Data/Intermediate/verified_patients.dta", gen(_mergeVerifiedPat)

* Generating variables for survey completion

gen survey_type = a02_entry_exit
recode survey_type (1=1) (2/3 = 2) (4/5 = 3)

keep q1_ques_code treatment final_stratum_id1-final_stratum_id13 survey_type survey_complete verified_patient post_exp uid_cluster q3_city_code q2_state_code expstartdate
reshape wide survey_complete, i(q1_ques_code) j(survey_type)

recode survey_complete1 survey_complete2 survey_complete3 (. = 0)

gen any_survey_complete = (survey_complete1 == 1 | survey_complete2 == 1 | survey_complete3 == 1)
gen entry_complete = (survey_complete1 == 1)
gen exit_complete = (survey_complete2 == 1 | survey_complete3 == 1)

assert verified_patient == any_survey_complete

* Merging with TB registers data

merge 1:1 q1_ques_code using "Data/Original/tb_registers.dta", gen(_mergeVerification)
drop if _mergeVerification == 2

* Creating variable for TB register data availability
gen verification_available = (out_come_verify ~= .)

* Regressions: Table A2, Panel A

su entry_complete if treatment == 0 & verified_patient == 1
local mean_control=r(mean)
ivreg2 entry_complete treatment final_stratum_id1-final_stratum_id13 post_exp if verified_patient == 1, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelA.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)

su exit_complete if treatment == 0 & verified_patient == 1
local mean_control=r(mean)
ivreg2 exit_complete treatment final_stratum_id1-final_stratum_id13 post_exp if verified_patient == 1, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)

su verification_available if treatment == 0 & verified_patient == 1 & (q3_city_code ~= 24 | q2_state_code ~= 13)
local mean_control=r(mean)
ivreg2 verification_available treatment final_stratum_id1-final_stratum_id13 post_exp if verified_patient == 1 & (q3_city_code ~= 24 | q2_state_code ~= 13), small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)


** Observation days

* Calling and merging datasets

merge 1:m q1_ques_code using "Data/Original/observation_days_patients.dta", gen(_mergeMonitoring)
drop if _mergeMonitoring == 2

preserve

	use "Data/Original/observation_days_centers.dta", clear
	keep unique_mon_instance visit_date
	
	so unique_mon_instance
	by unique_mon_instance: gen id = _n
	keep if id == 1 
	drop id

	tempfile monitoring_date
	save `monitoring_date'

restore

merge m:1 unique_mon_instance using `monitoring_date'

* Constructing variables to measure observation days data availability for patients

bys q1_ques_code: gen pat_id = _n if !missing(q1_ques_code)
gen temp = (patient_row == 1 | patient_row == 3) & visit_date >= expstartdate
so q1_ques_code
by q1_ques_code: egen monitoring_data_present_any = max(temp)
drop temp

so q1_ques_code unique_mon_instance patient_row
by q1_ques_code unique_mon_instance patient_row: gen obs = _n
replace obs = 0 if obs ~= 1 | unique_mon_instance == . | patient_row == 2 | visit_date < expstartdate
so q1_ques_code
by q1_ques_code: egen monitoring_data_present_nb = total(obs)

* Regressions: Table A2, Panel A (continued)

su monitoring_data_present_any if treatment == 0 & verified_patient == 1 & pat_id == 1 
local mean_control=r(mean)
ivreg2 monitoring_data_present_any treatment final_stratum_id1-final_stratum_id13 post_exp if pat_id == 1 & verified_patient == 1, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)

su monitoring_data_present_nb if treatment == 0 & verified_patient == 1 & pat_id == 1 
local mean_control=r(mean)
ivreg2 monitoring_data_present_nb treatment final_stratum_id1-final_stratum_id13 post_exp if pat_id == 1 & verified_patient == 1, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelA.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(1) dec(1) keep(treatment)


****************************************
*** PART-2 *** Checking the balance in the availability of health-worker-level data across the treatment and control groups
****************************************

** Health worker surveys

* Calling and merging datasets

use "Data/Original/hw_surveys.dta", clear

merge 1:1 Unique_ID using "Data/Intermediate/hw_roster_by_hw.dta"
assert _merge == 3		  
drop _merge

* Creating measures of survey data availability

gen baseline = (Baseline_Data == 1)
gen any_midline = (Midline_Data == 1 | Midline_Plus_Data == 1)
gen any_endline = (Endline_Data == 1 | Endline_Plus_Data == 1)

* Regressions: Table A2, Panel B

su baseline if treatment == 0
local mean_control=r(mean)
ivreg2 baseline treatment final_stratum_id1-final_stratum_id13, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelB.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)

su any_midline if treatment == 0
local mean_control=r(mean)
ivreg2 any_midline treatment final_stratum_id1-final_stratum_id13, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelB.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)

su any_endline if treatment == 0
local mean_control=r(mean)
ivreg2 any_endline treatment final_stratum_id1-final_stratum_id13, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelB.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)


** Salary data

* Calling and merging datasets

use "Data/Original/salary_slips.dta", clear

merge m:1 Unique_ID using "Data/Intermediate/hw_roster_by_hw.dta"
assert _merge == 3 
drop _merge

* Creating measure for amount of salary data available per healthworker

bys Unique_ID: gen salary_data_months = _N 
bys Unique_ID (MonthIntoExp): gen counselor_n = _n

* Regressions: Table A2, Panel B (continued)

su salary_data_months if treatment == 0 & counselor_n == 1
local mean_control=r(mean)
ivreg2 salary_data_months treatment final_stratum_id1-final_stratum_id13 if counselor_n == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelB.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(1) dec(1) keep(treatment)


****************************************
*** PART-3 *** Checking the balance in the availability of center-level data across the treatment and control groups
****************************************

** Observation days

* Calling and merging datasets

use "Data/Original/observation_days_centers.dta",clear

merge m:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta", gen(_mergeCenterData)
assert _mergeCenterData == 3
drop _mergeCenterData

* Creating variable to identify visits conducted before experiment start

gen before_exp = visit_date < expstartdate

* Constructing variables to measure observation days data availability for centers

so unique_mon_instance
by unique_mon_instance: gen id = _n
replace id = 0 if id ~= 1 | before_exp == 1
bys UID_Center: egen N_instances = total(id)
gen any_monitoring = (N_instances >= 0)
bys UID_Center: gen visit_number = _n

* Regressions: Table A2, Panel C

su any_monitoring if visit_number == 1 & treatment == 0
local mean_control=r(mean)
ivreg2 any_monitoring treatment final_stratum_id1-final_stratum_id13 if visit_number == 1, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelC.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)

su N_instances if visit_number == 1 & treatment == 0
local mean_control=r(mean)
ivreg2 N_instances treatment final_stratum_id1-final_stratum_id13 if visit_number == 1, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelC.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(1) dec(1) keep(treatment)


** Random spot checks

* Calling and merging datasets

use "Data/Original/spot_checks.dta", clear

merge m:1 UID_Center Unique_ID using "Data/Original/hw_roster.dta", gen(_mergeRandom)
assert _mergeRandom ~= 1

* Creating variable to identify spot checks conducted before experiment start

gen visit_date=mdy(visit_date_mo, visit_date_dd, visit_date_yyyy)
gen before_exp = visit_date < expstartdate
assert before_exp == 0

* Constructing variables to measure random spot checks data availability

bys UID_Center: gen center_n = _n
bys UID_Center: egen N_random_visits = total(_mergeRandom == 3)
bys UID_Center: egen any_random_visit = max(_mergeRandom)
recode any_random_visit (3 = 1) (2 = 0)

* Regressions: Table A2, Panel C (continued)

su any_random_visit if treatment == 0 & center_n == 1
local mean_control=r(mean)
ivreg2 any_random_visit treatment final_stratum_id1-final_stratum_id13 if center_n == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelC.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(3) dec(3) keep(treatment)

su N_random_visits if treatment == 0 & center_n == 1
local mean_control=r(mean)
ivreg2 N_random_visits treatment final_stratum_id1-final_stratum_id13 if center_n == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelC.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(1) dec(1) keep(treatment)


** Center-wise data sheets

* Calling and merging datasets

use "Data/Original/centerwise_sheets.dta", clear

merge m:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta", gen(_mergeCenterData)
ta _merge

* Constructing variables to measure center-wise data sheets availability

gen temp = (MonthIntoExp<101 & MonthIntoExp ~= . & _merge == 3)
so UID_Center
by UID_Center: egen nb_obs_centerwise = total(temp)

keep if _merge == 3
so UID_Center
by UID_Center: gen center_n = _n

* Regressions: Table A2, Panel C (continued)

su nb_obs_centerwise if treatment == 0 & center_n == 1
local mean_control=r(mean)
ivreg2 nb_obs_centerwise treatment final_stratum_id1-final_stratum_id13 if center_n == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA2_PanelC.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, No) adec(1) dec(1) keep(treatment)
